﻿using NPOI.HSSF.UserModel;
using NPOI.SS.UserModel;
using FC.Utils;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Threading.Tasks;
using FC.Data.ViewModels.Contests;
using System.Diagnostics;
using NSoup;
using NSoup.Nodes;
using NSoup.Select;

namespace HtmlReader
{
    class Program
    {
        static void Main(string[] args)
        {
            List<ExportProblemModel> problems = new List<ExportProblemModel>();
            List<ExportExampleModel> examples = new List<ExportExampleModel>();
            Stopwatch time = new Stopwatch();
            time.Start();

            string baseurl = "https://www.patest.cn/contests/pat-b-practise/";

            for (int i = 1; i < 66; i++)
            {

                try
                {

                    string url = baseurl + (1000 + i);


                    WebClient MyWebClient = new WebClient();
                    MyWebClient.Credentials = CredentialCache.DefaultCredentials;//获取或设置用于向Internet资源的请求进行身份验证的网络凭据
                    MyWebClient.Encoding = Encoding.UTF8;
                    MyWebClient.Headers["User-Agent"] = "blah";
                    Byte[] pageData = MyWebClient.DownloadData(url); //从指定网站下载数据
                                                                     //string pageHtml = Encoding.Default.GetString(pageData);  //如果获取网站页面采用的是GB2312，则使用这句            
                    string pageHtml = Encoding.UTF8.GetString(pageData); //如果获取网站页面采用的是UTF-8，则使用这句

                    ExportProblemModel problem = new ExportProblemModel();
                    ExportExampleModel example = new ExportExampleModel();

                    Document document = NSoupClient.Parse(pageHtml);

                    var problemInfos = document.Select("#problemInfo .value");

                    problem.Link = url;
                    problem.Title = document.Select("#body > h1")[0].Text().Substring(6);
                    if (problemInfos.Count > 0)
                        problem.TimeLimit = problemInfos[0].Text().Split(' ')[0];
                    if (problemInfos.Count > 1)
                        problem.MemoryLimit = problemInfos[1].Text().Split(' ')[0];
                    if (problemInfos.Count > 2)
                        problem.CodeLengthRestriction = problemInfos[2].Text().Split(' ')[0];
                    if (problemInfos.Count > 3)
                        problem.TestProcedure = problemInfos[3].Text().Split(' ')[0];
                    if (problemInfos.Count > 4)
                        problem.Author = problemInfos[4].Text().Split(' ')[0];

                    var elems = document.Select("#problemContent > *");
                    var list = GetText(elems);

                    List<string> result = new List<string>();
                    string str = "";
                    for (int j = 0; j < list.Count; j++)
                    {
                        if (list[j] == "输入格式：" || list[j] == "输出格式：" ||
                            list[j] == "输入样例：" || list[j] == "输出样例：")
                        {
                            result.Add(str);
                            str = "";
                        }
                        else if ((list[j].Length > 5 && list[j].Substring(0, 5) == "输入格式：") ||
                                 (list[j].Length > 5 && list[j].Substring(0, 5) == "输出格式：") ||
                                 (list[j].Length > 5 && list[j].Substring(0, 5) == "输入样例：") ||
                                 (list[j].Length > 5 && list[j].Substring(0, 5) == "输出样例："))
                        {
                            result.Add(str);
                            str = list[j].Substring(5);
                        }
                        else
                        {
                            str += (string.IsNullOrWhiteSpace(str) ? "" : "\n") + list[j];
                        }
                    }
                    result.Add(str);

                    if (result.Count > 0)
                        problem.Description = result[0];
                    if (result.Count > 1)
                        problem.InputFormat = result[1];
                    if (result.Count > 2)
                        problem.OutputFormat = result[2];
                    if (result.Count > 3)
                        example.InputExample = result[3];
                    if (result.Count > 4)
                        example.OutputExample = result[4];

                    problem.CollectionCode = "PATB";
                    problem.Id = "A5AFA617-B1C3-474F-PATB-D4ECB6F" + (10000 + i).ToString();
                    problem.No = (1000 + i).ToString();
                    problem.PassCount = "0";
                    problem.PassRate = "0";
                    problem.Score = "100";
                    problem.SubmitCount = "0";
                    problem.Tips = "";
                    problem.Convention = "";
                    example.Id = "B5BFB637-A1C3-4A4F-PATB-D4ECB6F" + (10000 + i).ToString();
                    example.ProblemId = problem.Id;


                    problems.Add(problem);
                    examples.Add(example);

                    Console.WriteLine($"OK: {time.ElapsedMilliseconds / 1000.0}秒  {1000 + i} - {problem.Title}");
                   // System.Threading.Thread.Sleep(5000);
                }
                catch (Exception e)
                {
                    Console.WriteLine($"--> Error: {1000 + i}  {e.Message} ");
                }  
            }

            IWorkbook workbook = new HSSFWorkbook();
            ISheet sheet = workbook.CreateSheet(problems, "Problems");
            sheet = workbook.CreateSheet(examples, "Examples");
            //打开xls文件，如没有则创建，如存在则在创建是不要打开该文件
            using (var fs = File.OpenWrite(@"E:\Export\PATB_1_65.xls"))
            {
                workbook.Write(fs);   //向打开的这个xls文件中写入mySheet表并保存。
                Console.WriteLine($"下载成功！ -->" + @"E:\Export\PATB_1_65.xls");
            }
        }

        /// <summary>
        /// 获取元素集合中每个元素的文本内容
        /// </summary>
        /// <param name="elems">元素集合</param>
        /// <returns></returns>
        public static List<string> GetText(Elements elems)
        {
            List<string> result = new List<string>();
            for (int i = 0; i < elems.Count; i++)
            {
                if (elems[i].Tag.Name.Equals("ul", StringComparison.CurrentCultureIgnoreCase) || elems[i].Tag.Name.Equals("ol", StringComparison.CurrentCultureIgnoreCase))
                {
                    result.AddRange(GetText(elems[i].Children));
                }
                else
                    result.Add(elems[i].Text());
            }
            return result;
        }

        /// <summary>
        /// 从NYOJ网站现在题目并保存到 Excel 文件中
        /// </summary>
        /// <param name="startId"></param>
        /// <param name="count"></param>
        /// <returns></returns>
        public int DowloadNYOJ(int startId, int count)
        {
            int result = 0;
            List<ExportProblemModel> problems = new List<ExportProblemModel>();
            List<ExportExampleModel> examples = new List<ExportExampleModel>();

            string baseurl = "http://acm.nyist.net/JudgeOnline/problem.php?pid=";

            //计时器
            Stopwatch time = new Stopwatch();
            time.Start();

            int endId = startId + count;
            for (int id = startId; id <= endId; id++)
            {
                string url = baseurl + id;
                try
                {

                    WebClient MyWebClient = new WebClient();
                    MyWebClient.Credentials = CredentialCache.DefaultCredentials;//获取或设置用于向Internet资源的请求进行身份验证的网络凭据
                    Byte[] pageData = MyWebClient.DownloadData(url); //从指定网站下载数据
                                                                     //string pageHtml = Encoding.Default.GetString(pageData);  //如果获取网站页面采用的是GB2312，则使用这句            
                    string pageHtml = Encoding.UTF8.GetString(pageData); //如果获取网站页面采用的是UTF-8，则使用这句

                    Document document = NSoupClient.Parse(pageHtml);
                    ExportProblemModel problem = new ExportProblemModel();
                    ExportExampleModel example = new ExportExampleModel();

                    problem.Title = document.Select("div.problem-display h2")[0].Text();
                    problem.MemoryLimit = document.Select("div.problem-ins span")[1].Text();
                    problem.TimeLimit = document.Select("div.problem-ins span")[0].Text();

                    var dds = document.Select("DL.problem-display dd");
                    var dts = document.Select("DL.problem-display dt");

                    for (int i = 0; i < dts.Count; i++)
                    {
                        switch (dts[i].Text())
                        {
                            case "描述":
                                problem.Description = dds[i].Text();
                                break;
                            case "输入":
                                problem.InputFormat = dds[i].Text();
                                break;
                            case "输出":
                                problem.OutputFormat = dds[i].Text();
                                break;
                            case "样例输入":
                                example.InputExample = dds[i].Text();
                                break;
                            case "样例输出":
                                example.OutputExample = dds[i].Text();
                                break;
                            case "提示":
                                problem.Tips = dds[i].Text();
                                break;
                            case "上传者":
                                problem.Author = dds[i].Text();
                                break;
                            case "来源":
                                problem.Keyword = dds[i].Text();
                                break;
                            default:
                                break;
                        }
                    }

                    problem.CollectionCode = "NYOJ";
                    problem.Id = "A5AFA617-B1C3-474F-NYOJ-D4ECB6F" + (10000 + id).ToString();
                    problem.No = (1000 + id).ToString();
                    problem.Link = url;
                    problem.PassCount = "0";
                    problem.PassRate = "0";
                    problem.Score = "100";
                    problem.SubmitCount = "0";
                    problem.TestProcedure = "Standard";
                    problem.Tips = "";
                    problem.Convention = "";
                    problem.CodeLengthRestriction = "";
                    example.Id = "B5BFB637-A1C3-4A4F-9A70-D4ECB6F" + (10000 + id).ToString();
                    example.ProblemId = problem.Id;

                    problems.Add(problem);
                    examples.Add(example);

                    result++; //成功下载数量 +1
                    Console.WriteLine($"OK: {time.ElapsedMilliseconds / 1000.0}秒  {1000 + id} - {problem.Title}");
                }
                catch (Exception e)
                {
                    Console.WriteLine($"--> Error: {id}  {"题目信息不完整或无此题目！"} ");
                }
            }

            IWorkbook workbook = new HSSFWorkbook();
            ISheet sheet = workbook.CreateSheet(problems, "Problems");
            sheet = workbook.CreateSheet(examples, "Examples");
            //打开xls文件，如没有则创建，如存在则在创建是不要打开该文件
            using (var fs = File.OpenWrite(@"E:\Export\NYOJ_1_1320.xls"))
            {
                workbook.Write(fs);   //向打开的这个xls文件中写入mySheet表并保存。
                Console.WriteLine($"下载成功！共完成 {result} 条数据下载 -->" + @"E:\Export\NYOJ_1_1320.xls");
            }
            return result;
        }

        public void nyoj()
        {

            List<ExportProblemModel> problems = new List<ExportProblemModel>();
            List<ExportExampleModel> examples = new List<ExportExampleModel>();
            string path = "http://acm.nyist.net/JudgeOnline/problem.php?pid=";

            Stopwatch time = new Stopwatch();

            time.Start();

            for (int i = 1001; i < 1186; i++)
            {
                if (i == 1103) i++;
                try
                {
                    WebClient MyWebClient = new WebClient();
                    MyWebClient.Credentials = CredentialCache.DefaultCredentials;//获取或设置用于向Internet资源的请求进行身份验证的网络凭据
                    Byte[] pageData = MyWebClient.DownloadData(path + i); //从指定网站下载数据
                                                                          //string pageHtml = Encoding.Default.GetString(pageData);  //如果获取网站页面采用的是GB2312，则使用这句            
                    string pageHtml = Encoding.UTF8.GetString(pageData); //如果获取网站页面采用的是UTF-8，则使用这句

                    int index = pageHtml.IndexOf("<H4></H4>", StringComparison.CurrentCultureIgnoreCase);
                    pageHtml = pageHtml.Substring(index);
                    string 题目 = pageHtml.Find("<H2>", "</H2>");
                    string 时间限制 = pageHtml.Find("时间限制：<SPAN class=\"editable highlight\" id=\"problem[time_limit]\">", "</SPAN> ms");

                    index = pageHtml.IndexOf("<dt>描述 </dt>", StringComparison.CurrentCultureIgnoreCase);
                    pageHtml = pageHtml.Substring(index + 1);
                    string 描述 = pageHtml.Find("<dd>", "</dd>");

                    index = pageHtml.IndexOf("<dt>输入</dt>", StringComparison.CurrentCultureIgnoreCase);
                    pageHtml = pageHtml.Substring(index);
                    string 输入 = pageHtml.Find("<dd>", "</dd>");

                    index = pageHtml.IndexOf("<dt>输出</dt>", StringComparison.CurrentCultureIgnoreCase);
                    pageHtml = pageHtml.Substring(index);
                    string 输出 = pageHtml.Find("<dd>", "</dd>");

                    index = pageHtml.IndexOf("<dt>样例输入</dt>", StringComparison.CurrentCultureIgnoreCase);
                    pageHtml = pageHtml.Substring(index);
                    string 样例输入 = pageHtml.Find("<dd>", "</dd>");

                    index = pageHtml.IndexOf("<dt>样例输出</dt>", StringComparison.CurrentCultureIgnoreCase);
                    pageHtml = pageHtml.Substring(index);
                    string 样例输出 = pageHtml.Find("<dd>", "</dd>");

                    index = pageHtml.IndexOf("<dt>上传者</dt>", StringComparison.CurrentCultureIgnoreCase);
                    pageHtml = pageHtml.Substring(index);
                    string 上传者 = pageHtml.Find("<dd>", "</dd>");

                    //Console.WriteLine($"描述:{描述}\n时间限制:{时间限制}\n输入:{输入}\n输出:{输出}\n样例输入:{样例输入}\n样例输出:{样例输出}\n上传者:{上传者}\n");
                    Console.WriteLine($"{time.ElapsedMilliseconds}\t{i} -- {题目} OK");

                    ExportProblemModel problem = new ExportProblemModel()
                    {
                        Id = Guid.NewGuid().ToString().ToUpper(),
                        CodeLengthRestriction = "8000",
                        CollectionCode = "NYOJ",
                        Convention = "",
                        Keyword = "",
                        MemoryLimit = "65536",
                        No = (1000 + i).ToString(),
                        PassCount = "0",
                        PassRate = "0",
                        Score = "100",
                        SubmitCount = "0",
                        TestProcedure = "Standard",
                        Tips = "",
                        Title = 题目,
                        Author = 上传者,
                        Link = path + i,
                        Description = 描述,
                        TimeLimit = 时间限制,
                        InputFormat = 输入,
                        OutputFormat = 输出,
                    };

                    // = 样例输入,
                    // 样例输出 = 样例输出,
                    ExportExampleModel example = new ExportExampleModel()
                    {
                        Id = Guid.NewGuid().ToString().ToUpper(),
                        ProblemId = problem.Id,
                        InputExample = 样例输入,
                        OutputExample = 样例输出,
                    };

                    problems.Add(problem);
                    examples.Add(example);
                }
                catch (Exception webEx)
                {
                    Console.WriteLine($"{i} -- Error");
                    Console.WriteLine(webEx.Message.ToString());
                }
            }

            IWorkbook workbook = new HSSFWorkbook();
            ISheet sheet = workbook.CreateSheet(problems, "Problems");
            sheet = workbook.CreateSheet(examples, "Examples");
            //打开xls文件，如没有则创建，如存在则在创建是不要打开该文件
            using (var fs = File.OpenWrite(@"D:\NPOI.xls"))
            {
                workbook.Write(fs);   //向打开的这个xls文件中写入mySheet表并保存。
                Console.WriteLine("生成成功");
            }

            /**
             List<ExportProblemModel> problems = new List<ExportProblemModel>();
            List<ExportExampleModel> examples = new List<ExportExampleModel>();

            //计时器
            Stopwatch time = new Stopwatch();
            time.Start();

            string baseurl = "http://acm.nyist.net/JudgeOnline/problem.php?pid=";
            //for (int group = 0; group < 1000; group += 1000)
            //{
                for (int id = 1; id <= 1320; id++)
                {
                    string url = baseurl + id;
                    try
                    {

                        WebClient MyWebClient = new WebClient();
                        MyWebClient.Credentials = CredentialCache.DefaultCredentials;//获取或设置用于向Internet资源的请求进行身份验证的网络凭据
                        Byte[] pageData = MyWebClient.DownloadData(url); //从指定网站下载数据
                                                                              //string pageHtml = Encoding.Default.GetString(pageData);  //如果获取网站页面采用的是GB2312，则使用这句            
                        string pageHtml = Encoding.UTF8.GetString(pageData); //如果获取网站页面采用的是UTF-8，则使用这句

                        Document document = NSoupClient.Parse(pageHtml);
                        ExportProblemModel problem = new ExportProblemModel();
                        ExportExampleModel example = new ExportExampleModel();

                        problem.Title = document.Select("div.problem-display h2")[0].Text();
                        problem.MemoryLimit = document.Select("div.problem-ins span")[1].Text();
                        problem.TimeLimit = document.Select("div.problem-ins span")[0].Text();

                        var dds = document.Select("DL.problem-display dd");
                        var dts = document.Select("DL.problem-display dt");

                        for (int i = 0; i < dts.Count; i++)
                        {
                            switch (dts[i].Text())
                            {
                                case "描述":
                                    problem.Description = dds[i].Text();
                                    break;
                                case "输入":
                                    problem.InputFormat = dds[i].Text();
                                    break;
                                case "输出":
                                    problem.OutputFormat = dds[i].Text();
                                    break;
                                case "样例输入":
                                    example.InputExample = dds[i].Text();
                                    break;
                                case "样例输出":
                                    example.OutputExample = dds[i].Text();
                                    break;
                                case "提示":
                                    problem.Tips = dds[i].Text();
                                    break;
                                case "上传者":
                                    problem.Author = dds[i].Text();
                                    break;
                                case "来源":
                                    problem.Keyword = dds[i].Text();
                                    break;
                                default:
                                    break;
                            }
                        }

                        problem.CollectionCode = "NYOJ";
                        problem.Id = "A5AFA617-B1C3-474F-9370-D4ECB6F" + (10000 + id).ToString();
                        problem.No = (1000 + id).ToString();
                        problem.Link = url;
                        problem.PassCount = "0";
                        problem.PassRate = "0";
                        problem.Score = "100";
                        problem.SubmitCount = "0";
                        problem.TestProcedure = "Standard";
                        problem.Tips = "";
                        problem.Convention = "";
                        problem.CodeLengthRestriction = "";
                        example.Id = "B5BFB637-A1C3-4A4F-9A70-D4ECB6F" + (10000 + id).ToString();
                        example.ProblemId = problem.Id;

                        problems.Add(problem);
                        examples.Add(example);

                        Console.WriteLine($"OK: {time.ElapsedMilliseconds / 1000.0}秒  {1000 + id} - {problem.Title}");
                    }
                    catch (Exception e)
                    {
                        Console.WriteLine($"--> Error: {id}  {e.Message} ");
                    }
                }

                IWorkbook workbook = new HSSFWorkbook();
                ISheet sheet = workbook.CreateSheet(problems, "Problems");
                sheet = workbook.CreateSheet(examples, "Examples");
                //打开xls文件，如没有则创建，如存在则在创建是不要打开该文件
                using (var fs = File.OpenWrite(@"E:\Export\NYOJ_1_1320.xls"))
                {
                    workbook.Write(fs);   //向打开的这个xls文件中写入mySheet表并保存。
                    Console.WriteLine("生成成功 -->" + @"E:\Export\NYOJ_1_1320.xls");
                }
            //}
             */
        }
    }

    public static class StringHelper
    {
        /// <summary>
        /// 匹配第一段fisrt到second之间的文本
        /// </summary>
        /// <param name="str"></param>
        /// <param name="begin"></param>
        /// <param name="end"></param>
        /// <returns></returns>
        public static string Find(this string str, string begin, string end)
        {
            string result = "";
            int a = str.IndexOf(begin, StringComparison.CurrentCultureIgnoreCase);
            int b = str.IndexOf(end, StringComparison.CurrentCultureIgnoreCase);
            if(a >= 0 && b >= 0)
                result = str.Substring(a + begin.Length, b - (a + begin.Length));

            a = result.IndexOf("<PRE id=\"sample_input\">");
            b = result.IndexOf("</PRE>");
            if(a >= 0 && b >= 0)
            {
                return str.Find("<PRE id=\"sample_input\">", "</PRE>");
            }
            a = result.IndexOf("<PRE id=\"sample_output\">");
            if (a >= 0 && b >= 0)
            {
                return str.Find("<PRE id=\"sample_output\">", "</PRE>");
            }
            a = result.IndexOf("\">");
            b = result.IndexOf("</a>");
            if (a >= 0 && b >= 0)
            {
                return str.Find("\">", "</a>");
            }
            result.Replace("<br />", "\n");
            result.Replace("&gt;", ">");
            result.Replace("&lt;", "<");

            return result;
        }
        

    }
}
